cap log close
log using "${logpath}misc_data_prep.log", replace

/*******************************************************************************
misc_data_prep.do

This code prepares miscellaneous data sets for later merging
*******************************************************************************/

**** Firm-level enforcement
import delim "${rawdatapath}Indicted_NPIsxfirm.csv", clear

drop if mi(npi)

* Find enforcement date
gen civdate2 = date(civildate, "MDY")
gen crimdate2 = date(criminaldate, "MDY")

gen firmenforcementdate = min(civdate2, crimdate2)
gen indictednpi = 1 

** Tag civil
* There's some "BOTH" -- treat it as criminal  
gen firmenforcementtype_civil = (criminalvscivil == "Civil")

keep npi firmenforcementdate firmenforcementtype_civil indictednpi

save "${cleandatapath}Indicted_NPIs.dta", replace 

************************************************************************
**** District-level enforcement

import delim "${rawdatapath}Treatment_Dates.csv", clear  
drop if mi(district)

gen criminal=(district_casetype=="Criminal")
drop district_casetype

gen treatmentdate_num=date(treatmentdate, "MDY")
gen backupdate_num=date(backupdate, "MDY")

keep treatmentdate_num backupdate_num casecount district criminal
reshape wide treatmentdate_num backupdate_num casecount, i(district) j(criminal)
rename (*0 *1) (civ_* crim_*)
rename *_num *

save "${cleandatapath}DOJ_data_district.dta", replace

************************************************************************
**** Crosswalk from DOJ Districts to Counties

use "${rawdatapath}zip_doj_crosswalk.dta", clear
save "${cleandatapath}zip_doj_crosswalk.dta", replace // Copy to cleaned

bysort fipscounty: keep if _n==1
keep fipscounty fipsstate district state
rename (fipscounty fipsstate state) (county state state_abbrev)
save "${cleandatapath}DOJcounty_Xwalk.dta", replace

************************************************************************
**** Crosswalk from DOJ Districts to States

use "${rawdatapath}zip_doj_crosswalk.dta", clear
keep if statewide_district==1
keep fipsstate state district
rename (fipsstate state district) (state state_abbrev statedistrict)
duplicates drop
save "${cleandatapath}DOJstate_Xwalk.dta", replace

************************************************************************
**** Enforcement Capacity Data

clear
import excel "${rawdatapath}WorkHours.xlsx", sheet("Sheet1") firstrow
drop N-X
rename (District Year) (district year)
destring DistrictCourtCriminal DistrictCourtCivil, replace force
save "${cleandatapath}WorkHours.dta", replace


************************************************************************/
**** Public Use Data

forvalues y = 2013/2017 {
	clear
	import delimited "${rawdatapath}MUP`y'.csv", stringcols(_all)
	gen year=`y'
	tempfile MUP`y'
	save `MUP`y'', replace
}
clear
forvalues y = 2013/2017 {
	append using `MUP`y''
}

foreach var in tot_benes tot_srvcs tot_bene_day_srvcs avg_sbmtd_chrg avg_mdcr_alowd_amt avg_mdcr_pymt_amt avg_mdcr_stdzd_amt {
	destring `var', replace
}
rename rndrng_* *

bysort npi hcpcs_cd year: gen ct=_N
foreach var in tot_benes tot_srvcs tot_bene_day_srvcs avg_sbmtd_chrg avg_mdcr_alowd_amt avg_mdcr_pymt_amt avg_mdcr_stdzd_amt {
	bysort npi hcpcs_cd year: replace `var'=`var'[_n]+`var'[_n-1] if _n!=1 & ct==2
}
bysort npi hcpcs_cd year: drop if _n==1 & ct==2
drop ct prvdr_type prvdr_cntry prvdr_type prvdr_mdcr_prtcptg_ind hcpcs_desc hcpcs_drug_ind place_of_srvc

reshape wide tot_benes tot_srvcs tot_bene_day_srvcs avg_sbmtd_chrg avg_mdcr_alowd_amt avg_mdcr_pymt_amt avg_mdcr_stdzd_amt, i(npi year) j(hcpcs_cd) string

save "${cleandatapath}MUP.dta", replace

log close